In [1]:
import pandas as pd
import plotly as pt
import plotly.express as px
%matplotlib inline
In [2]:
training_df = pd.read_csv('~/practice/data-platform/liveProject/jupyter/train.csv')
test_df = pd.read_csv('~/practice/data-platform/liveProject/jupyter/test.csv')
In [3]:
training_df.head()
Out[3]:
mean sd
0 0.564877 0.231592
1 -0.118285 0.020446
2 -0.157953 -0.106749
3 -0.157953 -0.106749
4 -0.157953 -0.106242
In [4]:
training_df.shape
Out[4]:
(64227, 2)
In [5]:
training_df.describe()
Out[5]:
mean sd
count 64227.000000 64227.000000
mean -0.002097 0.000773
std 0.953966 1.020192
min -0.284889 -0.106749
25% -0.157953 -0.106749
50% -0.157953 -0.106749
75% -0.135286 -0.106242
max 66.324763 57.676982
In [6]:
px.scatter(data_frame=training_df, x='mean', y='sd')
In [28]:
fig=px.histogram(training_df, x="mean", nbins=20)
fig.show()
fig=px.histogram(training_df, x="sd", nbins=20)
fig.show()
In [21]:
fig=px.box(training_df, y='mean')
fig.show()
fig=px.box(training_df, y='sd')
fig.show()
In [ ]: